#================================================#
# Machine Learning Lecture 2 in Python
# Author: Chong Ma
# Date : June 26, 2017
# Topic : Data Visulization in Python
#================================================#
#================================================#
# import Python library (just like library in R)
# most frequently used libraries: @_@ @_@ ...
# numpy, scipy, pandas, matplotlib, sympy etc.
#================================================#
# update jupyter notebook: pip install -U jupyter
import numpy as np
import scipy.stats as ss
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
import pandas as pd
#=====================================================#
# ^_^ Data Visulization ^_^ #
#=====================================================#
#=====================================================#
# ^_^ histograms ^_^ #
#=====================================================#
##generate normal distributed data
x_seq=np.linspace(-4,4,100)
rndnorm1=np.random.normal(0,1,10000)
# Create a figure instance, and the two subplots
fig=plt.figure(figsize=(12,4))
# the first subplot
ax1=fig.add_subplot(121) # equiv to ax1=fig.add_subplot(1,2,1)
ax1.hist(rndnorm1,bins=100,range=(-4,4),normed=True,
color=(0.8,0.6,0.4,0.75))
ax1.set_title("Histogram of Normal Distribution")
ax1.set_xlabel("x")
ax1.set_ylabel("Density")
ax1.text(-3,0.35,r'$\mu=0,\ \sigma=1$')
ax1.grid(True)
ax1.plot(x_seq,ss.norm.pdf(x_seq),linewidth=2)
# the second subplot
ax2=fig.add_subplot(122)
# help(sns.distplot)
sns.distplot(rndnorm1,bins=50,kde=True,rug=True,
kde_kws={"color": "g", "lw": 2, "label": "KDE"},
color=(0.8,0.6,0.4,0.75), ax=ax2)
ax2.set_title("Histogram of Normal Distribution")
ax2.set_xlabel("x")
ax2.set_ylabel("Density")
ax2.set_xticks(np.arange(-6,6,1))
ax2.set_yticks(np.arange(0,0.5,0.1))
ax2.text(-3,0.35,r'$\mu=0,\ \sigma=1$')
ax2.legend()
# plt.savefig("hist1.pdf")
plt.show()
#=====================================================#
# ^_^ scatter plots(matlab) ^_^ #
#=====================================================#
from scipy.stats import multivariate_normal
rndnorm2=np.random.normal(0,1,1000).reshape(500,2)
sigma=np.mat([[1,0.5],[0.5,1]])
rndnorm2=np.dot(rndnorm2,sigma)
x,y=np.mgrid[-5:5:0.05,-5:5:0.05]
pos=np.empty(x.shape+(2,))
pos[:,:,0]=x; pos[:,:,1]=y
plt.figure()
plt.scatter(rndnorm2[:,0],rndnorm2[:,1])
plt.contour(x,y,multivariate_normal([0,0], sigma).pdf(pos),
levels=np.arange(0.001,0.4,0.02),
colors='r',alpha=0.6)
plt.show()
#=====================================================#
# ^_^ scatter plots(Seaborn) ^_^ #
#=====================================================#
# Create a figure instance, and the two subplots
rndnorm2=pd.DataFrame(rndnorm2)
rndnorm2.columns=["x","y"]
s1=sns.jointplot(x="x",y="y",data=rndnorm2)
# hexbin plot
with sns.axes_style("white"):
sns.jointplot(x="x", y="y", kind="hex", color="k",data=rndnorm2);
# kernel density plot
sns.jointplot(x="x", y="y", data=rndnorm2, kind="kde");
plt.show()
#=====================================================#
# ^_^ scatter plots(Seaborn) ^_^ #
#=====================================================#
f, (ax1,ax2) = plt.subplots(ncols=2,figsize=(12, 6))
sns.kdeplot(rndnorm2.x, rndnorm2.y, ax=ax1)
sns.rugplot(rndnorm2.x, color="g", ax=ax1)
sns.rugplot(rndnorm2.y, vertical=True, ax=ax1);
cmap = sns.cubehelix_palette(as_cmap=True, dark=0, light=1, reverse=True)
sns.kdeplot(rndnorm2.x, rndnorm2.y, cmap=cmap, n_levels=60, shade=True, ax=ax2)
plt.show()
#=====================================================#
# ^_^ curves ^_^ #
#=====================================================#
f1=lambda t: np.exp(-t) * np.sin(2*np.pi*t)
t1 = np.arange(0.0, 5.0, 0.1)
t2 = np.arange(0.0, 5.0, 0.02)
plt.figure(1)
plt.subplot(211)
plt.plot(t1, f1(t1), 'bo', t2, f1(t2), 'k')
plt.subplot(212)
plt.plot(t2, np.cos(2*np.pi*t2), 'r--')
plt.show()
#=====================================================#
# ^_^ boxplots(seaborn) ^_^ #
#=====================================================#
# generate a simulated data with 5 categories
x=[]
[x.extend(np.random.normal(10*i,5*(i+1),100)) for i in range(5)]
x=np.array(x)
y=np.repeat(["AA","Delta","VA","JetBlue","Hawaiian"],[100]*5)
# convert to data frame in panda
simdat={"Delay":x, "Airlines":y}
simdat=pd.DataFrame(simdat)
sns.set(style="ticks", palette="muted",color_codes=True)
ax = sns.boxplot(x="Delay",y="Airlines",data=simdat,whis=np.inf, color="c")
plt.show()
sns.violinplot(x="Airlines",y="Delay",data=simdat,palette="Set3")
plt.show()
#=====================================================#
# ^_^ bar plots ^_^ #
#=====================================================#
objects = ('Python', 'C++', 'Java', 'Perl', 'R', 'SAS')
y_pos = np.arange(len(objects))
performance = [10,8,6,4,2,1]
colors = ['purple', 'gold', 'maroon', 'olive', 'navy', 'green']
plt.bar(y_pos, performance, align='center', alpha=0.6,
color=colors)
plt.xticks(y_pos, objects)
plt.ylabel('Usage')
plt.title('Programming language usage')
plt.show()
#=====================================================#
# ^_^ pie-chart ^_^ #
#=====================================================#
# Data to plot
explode = (0.1, 0, 0, 0, 0, 0) # explode 1st slice
plt.pie(performance, explode=explode, labels=objects, colors=colors,
autopct='%1.1f%%', shadow=True, startangle=90)
plt.axis('equal')
plt.show()
xlist = np.linspace(-3.0, 3.0, 3)
ylist = np.linspace(-3.0, 3.0, 4)
X, Y = np.meshgrid(xlist, ylist)
Z = np.sqrt(X**2 + Y**2)
## reset the print style
np.set_printoptions(formatter={'float': '{:0.2f}'.format})
print("X=\n", X, "\n",
"Y=\n", Y, "\n",
"Z=\n", Z)
#=====================================================#
# ^_^ contour plot ^_^ #
#=====================================================#
xlist = np.linspace(-3.0, 3.0, 3)
ylist = np.linspace(-3.0, 3.0, 4)
X, Y = np.meshgrid(xlist, ylist)
Z = np.sqrt(X**2 + Y**2)
plt.figure(figsize=(12,4))
plt.subplot(121)
cp1 = plt.contour(X, Y, Z,colors='b')
plt.clabel(cp1, inline=True,
fontsize=10)
plt.title('Contour Plot')
plt.xlabel('x (cm)')
plt.ylabel('y (cm)')
plt.subplot(122)
cp2 = plt.contour(X,Y,Z, colors="black", linestyles="dashed")
plt.clabel(cp2, inline=True, fontsize=12)
plt.title("contour plot")
plt.xlabel("x (cm)")
plt.ylabel("y (cm)")
plt.show()
## Another way to show contour plot
xlist = np.linspace(-3.0, 3.0, 100)
ylist = np.linspace(-3.0, 3.0, 100)
X, Y = np.meshgrid(xlist, ylist)
Z = np.sqrt(X**2 + Y**2)
plt.figure(figsize=(16,12))
plt.subplot(221)
cp3 = plt.contourf(X,Y,Z,cmap='magma')
plt.colorbar(cp3)
plt.title("contour plot")
plt.xlabel("x (cm)")
plt.ylabel("y (cm)")
plt.subplot(222)
cp4 = plt.contourf(X,Y,Z,cmap='Greys',
levels=np.arange(0,5,0.25))
plt.colorbar(cp4)
plt.title("contour plot")
plt.xlabel("x (cm)")
plt.ylabel("y (cm)")
plt.subplot(223)
cp5 = plt.contourf(X,Y,Z,cmap='hot',
levels=np.linspace(0,5,7))
plt.colorbar(cp5)
plt.title("contour plot")
plt.xlabel("x (cm)")
plt.ylabel("y (cm)")
plt.subplot(224)
cp6 = plt.contourf(X,Y,Z,cmap='inferno')
plt.colorbar(cp6)
plt.title("contour plot")
plt.xlabel("x (cm)")
plt.ylabel("y (cm)")
plt.tight_layout()
# plt.savefig("foo.pdf")
plt.show()
#=====================================================#
# ^_^ heatmap(seaborn) ^_^ #
#=====================================================#
## a DataFrame with three variables:
## year, month and passengers
flights = sns.load_dataset("flights")
flights = flights.pivot("month", "year", "passengers")
flights
# create a figure instance with four subplots
fig=plt.figure(figsize=(16,12))
ax1=fig.add_subplot(221)
ax2=fig.add_subplot(222)
ax3=fig.add_subplot(223)
ax4=fig.add_subplot(224)
# four heatmap in different versions
sns.heatmap(flights,ax=ax1)
sns.heatmap(flights, annot=True, fmt="d", ax=ax2, cmap="viridis")
sns.heatmap(flights, center=flights.loc["January", 1955], ax=ax3, cmap="plasma")
# generate a random dataset
data = np.random.randn(50, 20)
sns.heatmap(data, xticklabels=2, yticklabels=False, ax=ax4)
plt.show()
#=====================================================#
# ^_^ Image Process ^_^ #
#=====================================================#
from scipy import misc
face = misc.face()
print(face.shape, "\n", face.max,"\n", face.dtype)
## define a tint function that increase the lightness of
## the image
def tint(imag, percent):
if np.any(imag>1):
imag=imag/255
return imag+(np.ones(imag.shape)-imag)*percent
# tint=lambda imag, percent: imag+(np.ones(imag.shape)-imag)*percent
plt.figure()
plt.subplot(221)
plt.axis("off")
plt.imshow(face)
plt.subplot(222)
plt.axis("off")
plt.imshow(tint(imag=face,percent=0.4))
plt.subplot(223)
plt.axis("off")
plt.imshow(tint(imag=face,percent=0.6))
plt.subplot(224)
plt.axis("off")
plt.imshow(tint(imag=face,percent=0.8))
plt.show()